# 允许 的文件扩展名
ALLOWED_DOC_EXTENTISONS = {"txt", "docx", "pdf"}
ALLOWED_IMAGE_EXTENTISONS = {"jpg", "jpeg", "gif", "bmp", "svg", "png"}


# 判断文件名是否是允许 的文件类型
#  a_b_c.d.e.f.txt
def allowed_doc_file(filename):

    return filename.rsplit(".", 1)[1].lower() in ALLOWED_DOC_EXTENTISONS


def allowed_image_file(filename):

    return filename.rsplit(".", 1)[1].lower() in ALLOWED_IMAGE_EXTENTISONS


def extract_doc_content(file_path):
    ext = file_path.rsplit(".", 1)[-1].lower()
    if ext == "txt":
        with open(file_path, "r", encoding="utf-8") as file:
            return file.read()
    elif ext == "pdf":
        import pdfplumber

        text = ""
        with pdfplumber.open(file_path) as pdf:
            for page in pdf.pages:
                text += page.extract_text()
        return text
    elif ext == "docx":
        from docx import Document

        doc = Document(file_path)
        return "\n".join([para.text for para in doc.paragraphs])
    else:
        raise ValueError(f"不支持的文件类型:{ext}")
